//package com.office.utils.webmagic;
//
//import com.office.dataobject.WebmagicCircles;
//import com.office.utils.DBUtils;
//import us.codecraft.webmagic.Page;
//import us.codecraft.webmagic.Site;
//import us.codecraft.webmagic.Spider;
//import us.codecraft.webmagic.processor.PageProcessor;
//
//import java.util.List;
//
///**
// * 天眼查获取数据
// * webmagic-core jar包github重新下载，打包进本地库
// */
//public class TycPageProcessor implements PageProcessor {
//
//    /**
//     * 时间设置为20000毫秒，否则太少会被当做机器人
//     * https://www.tianyancha.com/search/or0100-e015-s2-t1-ot1?base=shenzhen
//     */
//    private Site site = Site.me().setRetryTimes(3).setSleepTime(20000).setTimeOut(10 * 1000).setCharset("UTF-8")
//            .addHeader("Content-Type", "application/x-www-form-urlencoded")
//            .setUserAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36")
//            .addCookie("dsdfjs", "Cookie: jsid=SEM-BAIDU-PZ1907-SY-000100; TYCID=0a0bfaa00a9a11ea852293c454e936e2; undefined=0a0bfaa00a9a11ea852293c454e936e2; ssuid=4681776936; _ga=GA1.2.2097079583.1574146730; _gid=GA1.2.1987408782.1574146730; RTYCID=efc454f87b8646238a01a078d02be65b; CT_TYCID=794f7367030949b485e34260a6f4f778; __insp_slim=1574153412525; __insp_wid=677961980; __insp_nv=true; __insp_targlpu=aHR0cHM6Ly93d3cudGlhbnlhbmNoYS5jb20vY2xhaW0vZW50cnk%3D; __insp_targlpt=5LyB5Lia6K6k6K_BIC0g5aSp55y85p_l; __insp_norec_howoften=true; __insp_norec_sess=true; aliyungf_tc=AQAAAHh1ixOCSQAAt8F0cVdCRyTONVRQ; csrfToken=Z8SYgoiNPPVDBoJUE8HRWet2; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1574146730,1574219381; token=71c4e47ae4f74ab280722d6f456945cf; _utm=445b432cd4d94ca383ecb9b7245683ec; bannerFlag=true; tyc-user-info=%257B%2522claimEditPoint%2522%253A%25220%2522%252C%2522myAnswerCount%2522%253A%25220%2522%252C%2522myQuestionCount%2522%253A%25220%2522%252C%2522signUp%2522%253A%25220%2522%252C%2522explainPoint%2522%253A%25220%2522%252C%2522privateMessagePointWeb%2522%253A%25220%2522%252C%2522nickname%2522%253A%2522%25E4%25BC%258A%25E5%2588%25A9%25E4%25BA%259A%25C2%25B7%25E4%25BC%258D%25E5%25BE%25B7%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522privateMessagePoint%2522%253A%25220%2522%252C%2522state%2522%253A%25220%2522%252C%2522announcementPoint%2522%253A%25220%2522%252C%2522isClaim%2522%253A%25220%2522%252C%2522bidSubscribe%2522%253A%2522-1%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522claimPoint%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTk4OTU0OTQ0OCIsImlhdCI6MTU3NDI0NDgxNCwiZXhwIjoxNjA1NzgwODE0fQ.N4uPKd9zlohaf2TBEyOj5-80BPB5Wq-sJGuG0dNY-dk-WTCu-5zi3Ny6jf1iIG9oKMX93cRcQbkxjjuotKs15A%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522mobile%2522%253A%252215989549448%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNTk4OTU0OTQ0OCIsImlhdCI6MTU3NDI0NDgxNCwiZXhwIjoxNjA1NzgwODE0fQ.N4uPKd9zlohaf2TBEyOj5-80BPB5Wq-sJGuG0dNY-dk-WTCu-5zi3Ny6jf1iIG9oKMX93cRcQbkxjjuotKs15A; _gat_gtag_UA_123487620_1=1; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1574244950; cloud_token=5effaafecee54658950940f1958a2344; cloud_utm=40bf410e1e034fa2ac6f2957fc81b19b");
//
//    public static void main(String[] args) {
//        for (int i = 1; i < 5; i++) {
//            String url = "https://www.tianyancha.com/search/or0100-e015-s2-t1-ot1/p" + i + "?base=shenzhen";
//            Spider.create(new TycPageProcessor()).addUrl(url).thread(1).run();
//        }
//
//    }
//
//    @Override
//    public void process(Page page) {
//        List<String> urls = page.getHtml().xpath("//div[@id='web-content']/div/div[1]/div[2]/div[2]/*/div/div[3]/div[1]/a").links().all();
//        //*[@id="web-content"]/div/div[1]/div[2]/div[2]/div[2]/div/div[3]/div[1]/a
//        page.addTargetRequests(urls);
//        String companyNames = page.getHtml().xpath("//div[@id='company_web_top']/div[2]/div[3]/div[1]/h1/text()").toString();
//        String unitCodes = page.getHtml().xpath("//div[@id=\"_container_baseInfo\"]/table[2]/tbody/tr[3]/td[2]/text()").toString();
//        String contracUsers = page.getHtml().xpath("//div[@id='_container_baseInfo']/table[1]/tbody/tr[1]/td[1]/div/div[1]/div[2]/div[1]/a/text()").toString();//div[@id='_container_baseInfo']/table[1]/tbody/tr[1]/td[1]/div/div[1]/div[2]/div[1]/a
//        String phones = page.getHtml().xpath("//div[@id=\"company_web_top\"]/div[2]/div[3]/div[3]/div[1]/div[1]/span[2]/text()").toString();
//        String prices = page.getHtml().xpath("//div[@id=\"_container_baseInfo\"]/table[2]/tbody/tr[1]/td[2]/div/text()").toString();
//        String birthdays = page.getHtml().xpath("//div[@id=\"_container_baseInfo\"]/table[2]/tbody/tr[2]/td[2]/div/text()").toString();
//
//        System.out.println(companyNames);
//        WebmagicCircles circles = new WebmagicCircles();
//        circles.setCompanyName(null == companyNames ? "" : companyNames);
//        circles.setUnitCode(null == unitCodes ? "" : unitCodes);
//        circles.setContactsUser(null == contracUsers ? "" : contracUsers);
//        circles.setPhone(null == phones ? "" : phones);
//        circles.setPrice(null == prices ? "" : prices);
//        circles.setBirthday(null == birthdays ? "" : birthdays);
//        DBUtils.insertTable(circles);
//    }
//
//    @Override
//    public Site getSite() {
//        return site;
//    }
//}
